# setup ---------------------------------------------
library(tidyverse)
library(here)
library(ggiraph)
library(patchwork)
library(ggtext)
library(janitor)
ggplot2::theme_set(theme_bw() + theme(
legend.text = element_markdown(family = 'IBM Plex Sans', colour = 'gray30'),
axis.text = element_markdown(family = 'IBM Plex Sans', colour = 'gray30'),
axis.title = element_markdown(family = 'IBM Plex Sans',colour = 'gray30'),
))
hrbrthemes::import_plex_sans()
source(here('R/report_plots.R'))
# Functions -----------------------------------------
convert_cq_to_copies <- function(cq) 10 ** ((40.1 - cq) / 3.23)
# creates yyyy-ww label for grouping data
get_date_week <- function(x){
y <- lubridate::year(x)
w <- lubridate::week(x) |> str_pad(2, 'left', 0)
str_glue("{y}-{w}")
}
# reverses get_date_week to Weds date during week
week_to_date <- function(year_week){
year <- str_extract(year_week, '^....')
d <- str_glue('{year}-01-01') |> as_date()
wk <- str_extract(year_week, '..$') |> as.integer()
ydays <- if_else(
year == '2021',
days(7*wk - 2),
days(7*wk - 3),
)
return(d + ydays)
}
# get biweekly bin date for vector of dates
get_date_biweekly <- function(x){
day_of_week <- lubridate::wday(x)
case_when(
day_of_week %in% 1:3 ~ x + 3 - day_of_week,
TRUE ~ x + 5 - day_of_week,
)
}
# dates 2021-2022 with biweekly groups
biweekly_date_sequence <- function(){
tibble(
date = seq(
as_date('2021-01-01'),
as_date('2022-12-31'),
by = 1)
) |>
mutate(biweek = get_date_biweekly(date))
}
# lookup table for date
week_midpoint_date_lookup <- function(start = '2021-01-01', end = '2023-01-01'){
ends = lubridate::as_date(c(start, end))
tibble(
date = seq(ends[1], ends[2], by = 1),
date_week = lubridate::week(date) |> str_pad(2, 'left', '0'),
date_year = lubridate::year(date),
week = str_glue("{date_year}-{date_week}"),
) |>
group_by(week) |>
summarise(date = mean(date))
}
# convert site1/site2 to hi/low traffic
get_traffic_level <- function(location){
if_else(
str_detect(location, 'Site 1'),
'high traffic',
'low traffic'
)
}
binom_ci <- function(x, n){
Hmisc::binconf(x, n, method = 'wilson', alpha = 0.05) |>
as_tibble() |>
janitor::clean_names()
}
get_binom_ci <- function(data){
data |>
summarise(
x = sum(pcr_positive == 'Positive'),
n = n(),
binconf = map2(x, n, ~binom_ci(.x, .y))
) |>
unnest(binconf) |>
mutate(across(c(point_est, lower, upper), ~(.x*100) |> round(1)))
}
# Plot Elements ---------------------------------------
theme_color <- 'cornflowerblue'
# layout x-axis
scale_x_study_dates <- function(){
scale_x_date(
date_breaks = 'month',
date_labels = month.name[c(9:12, 1:5)] |> strtrim(3) |>
paste0(' ', c(rep(2021, 4), rep(2022, 5))),
limits = c(
min(swabs_tidy$date_swab),
max(swabs_tidy$date_swab)
))
}
# get rid of x-axis for vertically stacked plots
theme_no_x_labels <- function(){
theme(
axis.ticks.x = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank()
)
}
# get rid of y-axis for horizontally stacked plots
theme_no_y_labels <- function(){
theme(
axis.ticks.y = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank()
)
}
# no minor grid and adjust spacing for multipanel
theme_remove_grid <- function(){
theme(
axis.title.y = element_blank(),
panel.grid.minor = element_blank(),
plot.title.position = 'panel',
plot.title = element_markdown(
vjust = -1,
size = 8,
face = 'bold',
family = 'IBM Plex Sans',
colour = 'gray50',
margin = margin(0, 0, 0, 0)),
plot.margin = unit(c(0, 0, 0, 0), 'mm'))
}
# Data ------------------------------------------------
# swab data
swabs <-
read_rds(here('data/cube.rds')) |>
filter(str_detect(site, '^UO_'))
# filtered swabs without controls or sponge samples
swabs_tidy <- swabs |>
filter(!negative_control,
swab_type != 'sponge',
date_swab < '2022-04-27') |>
select(date_swab, site, floor, location, pcr_positive, pcr_ct, co2) |>
mutate(biweek = get_date_biweekly(date_swab))
# lookup table for waste water site names
lookup_ww <- tribble(
~site_abbr, ~site_name,
'UO_AA', 'Annex Residence',
'UO_FA', 'Faculty of Social Sciences',
'UO_FT', 'Friel Residence',
'UO_NA', 'Northern sampling site',
'UO_RGN', 'Roger Guindon Hall',
'UO_ST', 'Southern sampling site',
'UO_TBT', 'Tabaret Hall'
)
# UOttawa waste water data from R. Delatolla to clean
clean_ww_excel <- function(){
readxl::read_excel(here::here('data/ww_university.xlsx')) |>
janitor::clean_names() |>
transmute(
sample_date = as_date(sample_date),
site = source_name,
signal = viral_copies_per_copies_pep_avg
) |>
mutate(
site = str_remove_all(
site, 'Data\\s-\\suOttawa\\s-\\s|.xlsx'
) |>
str_to_upper()
) |>
filter(site != 'UO_RGN', !is.na(signal))
}
clean_ww_excel() |>
write_csv(here::here('data/ww_university_clean.csv'))
rm(clean_ww_excel)
# clean WW data close to study period, no RGN site.
uottawa_ww <-
read_csv(here::here('data/ww_university_clean.csv'),
show_col_types = FALSE) |>
filter(
sample_date > '2021-09-15',
sample_date < '2022-05-05'
) |>
left_join(lookup_ww, by = c('site' = 'site_abbr'))
# ottawa wastewater data: daily means
regional_ww <-
read_rds(here('data/ww_ottawa.rds')) |>
select(sample_date, starts_with('cov_')) |>
pivot_longer(contains('cov_')) |>
mutate(target = str_extract(name, 'cov_n.'),
stat = str_extract(name, 'mean|sd'),
) |>
select(-name) |>
pivot_wider(names_from = stat, values_from = value) |>
mutate(.lower = mean - sd, .upper = mean + sd)
# data from university of ottawa
wifi <-
read_rds(here('data/uo_wifi.rds')) |>
filter(date >= min(swabs$date_swab),
date <= max(swabs$date_swab))
# important dates for context
event_dates <-
tribble(
~event, ~start, ~end,
'Reading Week', '2021-10-24', '2021-10-30',
'Holiday Break', '2021-12-22', '2022-01-04',
'Closure', '2022-01-04', '2022-01-31',
'Closure', '2022-02-16', '2022-02-21',
'Reading Week', '2022-02-20', '2022-02-26',
) |>
mutate(across(start:end, as_date))
# UOttawa cases data
# clean data from: source(here('R/01_impute_missing_case_dates.R'))
cases <-
read_rds(here('data/cases_rule_imputed.rds')) |>
select(case, role, case_date, UC:TBT) |>
mutate(biweek = get_date_biweekly(case_date)) |>
nest(associated_sites = UC:TBT)
total_n_cases_study_period <-
cases |>
filter(case_date > '2021-09-20') |>
nrow()
# Summaries -----------------------------------------
#
positivity_summary <- function(swabs){
swabs |>
summarise(
n = n(),
x = sum(pcr_positive == 'Positive', na.rm = F),
positivity = round(100 * x / n, digits = 1),
.groups = 'drop')
}
co2_summary <- function(swabs){
swabs |>
summarise(
n = n(),
co2_vals = list(co2),
co2_mean = mean(co2, na.rm = T),
.groups = 'drop')
}
# overall summary
swab_summary <- swabs_tidy |> positivity_summary()
# site summary
swab_summary_sites <- swabs_tidy |>
group_by(site) |>
positivity_summary()
# high-low traffic summary
swab_summary_location_traffic <-
swabs_tidy |>
mutate(traffic = get_traffic_level(location)) |>
group_by(traffic) |>
positivity_summary()
## Figure 1: data aggregation --------------------------
# uottawa cases - biweekly counts
cases_biweekly <-
cases |>
select(case, biweek, role) |>
group_by(biweek) |>
summarise(cases = n(),
cases_student = sum(role == 'Student'),
cases_staff = sum(role == 'Employee'),
) |>
right_join(
biweekly_date_sequence() |>
filter(biweek < '2022-02-03') |>
distinct(biweek),
by = 'biweek'
) |>
mutate(across(where(is.integer), ~if_else(is.na(.), 0L, .)))
# swabs biweekly summary
swabs_biweekly <-
swabs_tidy |>
group_by(biweek) |>
positivity_summary() |>
left_join(swabs_tidy |> group_by(biweek) |> co2_summary(),
by = join_by(biweek, n))
# swabs biweekly summary by site
swabs_biweekly_by_site <-
swabs_tidy |>
group_by(site, biweek) |>
positivity_summary() |>
left_join(
swabs_tidy |> group_by(site, biweek) |> co2_summary(),
by = join_by(site, biweek, n)
)
# UOttawa ww biweekly means
uottawa_ww_biweekly <-
uottawa_ww |>
mutate(biweek = get_date_biweekly(sample_date)) |>
group_by(biweek) |>
summarise(signal = mean(signal),
n = n())
# daily ww data for study period
regional_ww_daily <-
regional_ww |>
filter(
sample_date >= min(swabs_tidy$date_swab) - 4,
sample_date <= max(swabs_tidy$date_swab) + 4,
) |>
group_by(sample_date) |>
summarise(mean = mean(mean))
# regional ww biweekly means
regional_ww_biweekly <-
regional_ww_daily |>
mutate(biweek = get_date_biweekly(sample_date)) |>
group_by(biweek) |>
summarise(mean = mean(mean))
# wifi site agg
wifi_extended <- read_rds(here('data/uo_wifi.rds')) |>
filter(date >= min(swabs$date_swab) - 2,
date <= max(swabs$date_swab) + 2) |>
mutate(biweek = get_date_biweekly(date))
# overall wifi biweekly timeseries
wifi_biweekly <-
wifi_extended |>
group_by(biweek) |>
summarise(
n = n(),
min = min(clients, na.rm = T),
mean = mean(clients, na.rm = T),
max = max(clients, na.rm = T)
)
# per building wifi biweekly timeseries
wifi_biweek_sites <-
wifi_extended |>
group_by(biweek, site) |>
summarise(
n = n(),
min = min(clients),
mean = mean(clients),
max = max(clients),
.groups = 'drop'
)
# Map --------------------------------------------------
swab_coords <- tribble(
~site, ~name, ~lat, ~lng,
'MRT', 'Morrisette Hall (MRT)', 45.4232391, -75.684289,
'MNT', 'Montpetit', 45.4225417102, -75.6826587146,
'90U', '90 University', 45.422425557, -75.68501449,
'DMS', 'Desmarais Bldg.', 45.4238767934, -75.687270754,
'TBT', 'Tabaret Hall', 45.4245094016, -75.6863190018,
'LPR', 'Louis Pasteur Bldg.', 45.42137566861, -75.6802638999,
)
ww_coords <- tribble(
~site, ~name, ~lat, ~lng,
'aa', 'Annex Residence', 45.4267812902, -75.6803440596,
'fa', 'Faculty of Soc. Sci.', 45.4216247, -75.6839038601,
'ft', 'Friel Residence', 45.4304344008, -75.6829076653,
'tbt', 'Tabaret Hall', 45.4245094016, -75.6863190018
# 'na', 'North sampling Site', NA, NA,
# 'st', 'South Sampling Site', NA, NA, # Nobody knows where this is
)
figure_sites_map <-
leaflet::leaflet(swab_coords, width = 600, height = 330) |>
leaflet::addProviderTiles(
leaflet::providers$CartoDB.Positron
) |>
leaflet::setView(lng = -75.6843, lat = 45.4235, zoom = 14) |>
leaflet::addCircleMarkers(
~lng, ~lat,
label = ~name,
popup = ~name,
radius = 2,
color = '#440099'
) |>
leaflet::addLabelOnlyMarkers(
~lng, ~lat,
label = ~site,
labelOptions = leaflet::labelOptions(
noHide = T, direction = 'top', textOnly = T,
)) |>
leaflet::addCircleMarkers(
~lng,
~lat,
label = ~site,
radius = 2,
color = '#440099'
) |>
leaflet::addCircleMarkers(
data = ww_coords,
~lng,
~lat,
label = ~name,
popup = ~name,
radius = 4,
color = '#f96999'
)
# Results ----------------------------------------------------
rs_data <- lst(
swabs_collected = nrow(swabs_tidy),
positivity_ovr = swab_summary$positivity,
positivity_site_min = min(swab_summary_sites$positivity),
positivity_site_max = max(swab_summary_sites$positivity),
)
rs_abstract <- rs_data |>
glue::glue_data(
"Over the 32-week study period, we collected {swabs_collected} swabs at six university buildings, including two buildings where wastewater surveillance was currently ongoing.
Overall, {positivity_ovr}% of swabs were PCR-positive for SARS-CoV-2, with individual buildings ranging between {positivity_site_min}% and {positivity_site_max}%.
*Comment on when spikes in cases, swabs, and waste-water occurred….* *Comment on prediction of cases using swabs, ww, or combined approach…*
"
)
# rs_results <- rs_data |> glue::glue_data()
## corr -----
# joined data for pairwise correlation
biweekly_data <- swabs_biweekly |>
transmute(biweek, `Swab PCR` = positivity, `CO2` = co2_mean) |>
left_join(
wifi_biweekly |> transmute(biweek, `Wi-Fi` = mean),
by = 'biweek'
) |>
left_join(
uottawa_ww_biweekly |> transmute(biweek, `University WW` = signal),
by = 'biweek'
) |>
left_join(
regional_ww_biweekly |> transmute(biweek, `Ottawa WW` = mean),
by = 'biweek'
) |>
left_join(
cases_biweekly |> transmute(biweek, Cases = cases),
by = 'biweek'
)
corr_spearman <- biweekly_data |>
select(-biweek) |>
corrr::correlate(use = 'pairwise.complete.obs',
method = 'spearman')
# to get pvalues...
corrtest <-
biweekly_data |>
select(-biweek) |>
psych::corr.test(
use = 'pairwise',
method = 'spearman',
adjust = 'none' # See p.adjust for "holm" > "bonferroni").
)
# spearman r and p-values; upper triangle has adj. correlations
corr_r <- corrtest$r |> round(3)
corr_p <- corrtest$p |> round(3)
# remove lower tri with raw probabilities
corr_r[lower.tri(corr_r, diag = T)] <- NA
corr_p[lower.tri(corr_p, diag = T)] <- NA
corr_tbl_r <- kableExtra::kable(
x = corr_r,
format = 'pipe',
caption = 'Campus-wide, biweekly metrics: Spearman r'
)
corr_tbl_p <- kableExtra::kable(
x = corr_p,
format = 'pipe',
caption = 'Campus-wide, biweekly metrics: p-value on Spearman r'
)
corr_ci <- corrtest |>
pluck('ci') |>
as_tibble() |>
mutate(terms = rownames(corrtest$ci), .before = 1L)
rm(corr_r, corr_p, corrtest)